# 之前使用正则表达式感觉不太好，会出现不少的乱码问题，这次尝试使用Xpath
import requests
from lxml import etree

url_base = "https://www.mee.gov.cn/xxgk2018/227/index_11252.html"

def download_one_page(url):
    #拿到页面源代码
    resp = requests.get(url)
    resp.encoding = "utf-8"

    #使用xpath解析
    html = etree.HTML(resp.text)

    #获取信息表格
    table = html.xpath("//*[@class=\"iframe-list\"]/table")[0]
    trs = table.xpath("./tr")

    #处理每一个表格行信息
    for tr in trs:
        time = tr.xpath("./td/span/text()")
        txt = tr.xpath("./td[3]/text()")
        title = tr.xpath("./td[2]/a/text()")
        link = tr.xpath("./td[2]/a/@href")
        print(time[0])
        print(txt[0])
        print(title[0])
        print(link[0])

if __name__ == '__main__':
    download_one_page(url_base) 